###################################################
#June 2018
#Fly genotype analyses prepared by Genevera Allen
#For the Botas & Shulman Labs

#############
#Changes from April 2017 version:
#1) Added in a test for differences at a single trial day.  The default is to use the first trial day recorded.
#2) Added in a test for a mean shift in genotypes, but no change over time.  Also did post-hoc pairwise Genotype comparisons for this.
#3) Now report pairwise genotype tests for all 3 longitudinal models: genotype only (no time trend), genotype + mean shift non-linear time trend, and interaction between genotype and time (e.g. each genotype has a different longitudinal curve)
##############


############################################################
#Note: Please change the filename to run on a new data set
#############################################################

data = read.csv(file.choose())


####################################################
#prelims - check that these are correct
outcomes = unique(data$Metric)
gens = unique(data$Genotype)
#outcome metrics
outcomes
#Genotypes
gens


########
require(reshape2)
require(splines)
require(lme4)
require(ggplot2)
require(knitr)
require(multcomp)
pvgene = 0
pvgtime = 0
pvfull = 0
ppvgene = NULL
ppvgtime = NULL
ppvfull = NULL


#loop through the outcomes
for(i in 1:length(outcomes)){

  ind = which(data$Metric==outcomes[i])

#plotting data

  dev.new()
  
  print(ggplot(data[ind,],aes(x=Trialday,y=Observation,colour=Genotype)) + geom_point(aes(color=Genotype)) + stat_smooth(method="rlm",formula=y~bs(x),se=FALSE,size=2) + ggtitle(outcomes[i]) + geom_line(aes(group=paste(Genotype,Replicate)),alpha=.3))
  


#Longitudinal tests

#longitudinal mixed effects models
  print(paste(outcomes[i],": Longitudinal Genotype Tests",sep=""))
  M0 = lmer(Observation~(1|Replicate),data=data[ind,])
  Mtime = lmer(Observation~(1|Replicate)+bs(Trialday),data=data[ind,])
  Mgene = lmer(Observation~(1|Replicate)+Genotype,data=data[ind,])
  Mgenetime = lmer(Observation~(1|Replicate)+bs(Trialday)+Genotype,data=data[ind,])
  Mfull = lmer(Observation~(1|Replicate)+bs(Trialday)*Genotype,data=data[ind,])
  res1 = anova(M0,Mgene)
  print(res1)
  res2 = anova(M0,Mtime)
  if(res1[2,8]<res2[2,8]){
    resf = anova(M0,Mgene,Mgenetime,Mfull)
  }else{
    resf = anova(M0,Mtime,Mgenetime,Mfull)
  }
  print(resf)
  pvgene[i] = res1[2,8]
  pvgtime[i] = resf[3,8]
  pvfull[i] = resf[4,8]

######
## Here, we report the results of fitting the longitudinal mixed effects models.  Three p-values are reported and from top to bottom correspond to the p-value for the genotype main effect (no time trend), the genotype + non-linear time trend main effects, and the interaction between the non-linear time trend and genotype respectively.  Note that the latter can be interpreted as testing whether the genotype curves over time are different.  
#######


#Post-hoc Pairwise Genotype Tests
  if(length(gens)>2){
    print(paste(outcomes[i],": Longitudinal Genotype Tests - Pairwise",sep=""))
    gpvgene = NULL
    gpvgtime = NULL
    gpvfull = NULL
    nams1 = NULL
    nams2 = NULL
    for (j in 1:(length(gens)-1)){
      for (jj in (j+1):length(gens)){
        nams1 = c(nams1,as.character(gens[j]))
        nams2 = c(nams2,as.character(gens[jj]))
        indg = which(data$Metric==outcomes[i] & (data$Genotype==gens[j] | data$Genotype==gens[jj]))
        gM0 = lmer(Observation~(1|Replicate),data=data[indg,])
        gMtime = lmer(Observation~(1|Replicate)+bs(Trialday),data=data[indg,])
        gMgene = lmer(Observation~(1|Replicate)+Genotype,data=data[indg,])
        gMgenetime = lmer(Observation~(1|Replicate)+bs(Trialday)+Genotype,data=data[indg,])
        gMfull = lmer(Observation~(1|Replicate)+bs(Trialday)*Genotype,data=data[indg,])
        gres1 = anova(gM0,gMgene)
        gres2 = anova(gM0,gMtime)
        if(gres1[2,8]<gres2[2,8]){
          gresf = anova(gM0,gMgene,gMgenetime,gMfull)
        }else{
          gresf = anova(gM0,gMtime,gMgenetime,gMfull)
        }
        gpvgene = c(gpvgene,gres1[2,8])          
        gpvgtime = c(gpvgtime,gresf[3,8])          
        gpvfull = c(gpvfull,gresf[4,8])          
        ppvgene = c(ppvgene,gres1[2,8])          
        ppvgtime = c(ppvgtime,gresf[3,8])          
        ppvfull = c(ppvfull,gresf[4,8])          
      }
    }

    if(length(gens>2)){
#pair-wise Genotype tests

#for genotype only model (no time trend)
      print("Genotype")
      adj.p.vals = p.adjust(gpvgene,method="holm") 
      print(data.frame(nams1,nams2,adj.p.vals))

#for genotype + non-linear time trend
      print("Genotype + Time")
      adj.p.vals = p.adjust(gpvgtime,method="holm") 
      print(data.frame(nams1,nams2,adj.p.vals))

#for genotype * non-linear time trend (interaction model)
      print("Genotype * Time")
      adj.p.vals = p.adjust(gpvfull,method="holm") 
      print(data.frame(nams1,nams2,adj.p.vals))
    }
  }
}
     
#########
# Here, we report the pairwise post-hoc tests for testing whether all possible pairs of genotype curves are different.   Note that the reported p-values are already adjusted for multiplicity using Holm's procedure.  The p-values correspond to testing the difference between the two genotypes given in the first two columns.
##########


######################################################
#####################################################
#Results for All Metrics Adjusted for Multiplicity

print("Results for all metrics adjusted for multiplicity")
#full model - all genotype groups
#for genotype only (no time trend)
print("Genotype")
adj.p.vals = p.adjust(pvgene,method="holm")
print(data.frame(outcomes,adj.p.vals))

#for genotype + non-linear time trend
print("Genotype + Time")
adj.p.vals = p.adjust(pvgtime,method="holm")
print(data.frame(outcomes,adj.p.vals))

#for interaction between genotype and time trend
print("Genotype * Time")
adj.p.vals = p.adjust(pvfull,method="holm")
print(data.frame(outcomes,adj.p.vals))


############
#models comparing pairs of genotypes
if(length(gens)>2){
  print("Results for all metrics adjusted for multiplicity - Pairwise")
  ppvouts = rep(outcomes,each=length(nams1))
  ppvnams1 = rep(nams1,length(outcomes))
  ppvnams2 = rep(nams2,length(outcomes))

#for genotype only (no time trend)
  print("Genotype")
  adj.p.vals = p.adjust(ppvgene,method="holm")
  print(data.frame(Metric=ppvouts,Gene1=ppvnams1,Gene2=ppvnams2,adj.p.vals))

#for genotype + non-linear time trend
  print("Genotype + Time")
  adj.p.vals = p.adjust(ppvgtime,method="holm")
  print(data.frame(Metric=ppvouts,Gene1=ppvnams1,Gene2=ppvnams2,adj.p.vals))

#for interaction between genotype and time trend
  print("Genotype * Time")
  adj.p.vals = p.adjust(ppvfull,method="holm")
  print(data.frame(Metric=ppvouts,Gene1=ppvnams1,Gene2=ppvnams2,adj.p.vals))
}



#####
#Here, we report p-values across all metrics that have been adjusted for multiple testing.  The p-values correspond to testing for differences in the genotype curves (e.g. testing the interaction between the non-linear trend and genotype) for each metric.  The reported p-values have already been adjusted for multiplicity using Holm's procedure. 
#######
